# Load library
library(Seurat)
library(princurve)
library(monocle)
library(FateID)
library(parallel)
library(dplyr)
library(reshape2)
library(ggplot2)
library(cowplot)
library(ggExtra)
library(patchwork)
library(RColorBrewer)
library(wesanderson)
#Set ggplot theme as classic
theme_set(theme_classic())colors <- c("#969696",
tolower(c("#68B041", "#E3C148", "#B7D174", "#83C3B8", "#009FDA", "#3E69AC", "#E46B6B")),
"#ec756d", "#c773a7", "#7293c8", "#b79f0b", "#3ca73f","#31b6bd",
"#ebcb2e", "#9ec22f", "#a9961b", "#cc3a1b", "#cc8778" , "#d14c8d", "#4cabdc", "#5ab793", "#e7823a","#e6bb9b", "#046c9a", "#4784a2" , "#4990c9")
DimPlot(Allcells.data,
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 2,
no.legend = F,
cols.use = colors)We perform Kmeans clustering on the 4 cell state scores :
SPPalSP.BPPal.BP#Calculate Pallial and Sub-pallial BP scores
Pal.BP.genes <- c("Eomes", "Neurog2", "Neurog1", "Prmt8", "Nrp1")
genes.list <- list(Pal.BP.genes)
enrich.name <- "Pal.BP_signature"
Allcells.data <- AddModuleScore(Allcells.data, genes.list = genes.list, genes.pool = NULL, n.bin = 5,
seed.use = 1, ctrl.size = length(genes.list), use.k = FALSE, enrich.name = enrich.name ,
random.seed = 1)
SP.BP.genes <- c("Dlx1", "Dlx2", "Dlx5","Ascl1", "Gsx2")
genes.list <- list(SP.BP.genes)
enrich.name <- "SP.BP_signature"
Allcells.data <- AddModuleScore(Allcells.data, genes.list = genes.list, genes.pool = NULL, n.bin = 5,
seed.use = 1, ctrl.size = length(genes.list), use.k = FALSE, enrich.name = enrich.name ,
random.seed = 1)set.seed(100)
# Run Kmeans clustering
cl <- kmeans(cbind(Allcells.data@meta.data$SP_signature1,
Allcells.data@meta.data$Pal_signature1,
Allcells.data@meta.data$SP.BP_signature1,
Allcells.data@meta.data$Pal.BP_signature1), 4)
Allcells.data@meta.data$kmeanClust <- paste0("Clust.",cl$cluster)col.pal <- wes_palette("GrandBudapest1", 4, type = "discrete")
p1 <- ggplot(Allcells.data@meta.data, aes(x=SP_signature1, y=Pal_signature1, colour = kmeanClust)) +
scale_color_manual(values=col.pal) +
geom_point() +
theme(legend.position="none")
ggMarginal(p1, type = "histogram", fill="lightgrey")DimPlot(Allcells.data,
group.by = "kmeanClust",
reduction.use = "spring",
cols.use = col.pal,
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 4,
no.legend = F)We then extract the Pallial cells branche. We also excludes CR cells cluster form the trajectory inference.
#Remove the sub-pallial cells branche
MeanKclust.SPscore <- aggregate(SP_signature1 ~ kmeanClust, Allcells.data@meta.data, mean)
SPclust <- MeanKclust.SPscore %>% filter(SP_signature1 == max(SP_signature1)) %>% pull(kmeanClust)
SP.cells <- Allcells.data@meta.data %>% filter(kmeanClust == SPclust) %>% pull(Barcodes)
# Remove cells not use for trajectory inference (Subpallial cells defined by kmeans, Late GABA neurons and CR cells)
Excluded.clusters <- Allcells.data@meta.data %>%
filter(Cluster.ident %in% grep("*Sub|GABA|LN.Glut.13|LN.Glut.14|LN.Glut.1$", unique(as.character(Allcells.data@ident)), value = T)) %>%
pull(Barcodes)
# We further keep only pallial apical progenitor cluster among other AP clusters
MeanKclust.APscore <- aggregate(AP_signature1 ~ kmeanClust, Allcells.data@meta.data, mean)
APclust <- MeanKclust.APscore %>% filter(AP_signature1 == max(AP_signature1)) %>% pull(kmeanClust)
All.AP <- Allcells.data@meta.data %>% filter(kmeanClust == APclust) %>% pull(Barcodes)
Valide.AP <- Allcells.data@meta.data %>% filter(Cluster.ident %in% grep("Dorsal.Pallium|lateral.Pallium.1|lateral.Pallium.2|Ventral.Pallium",
unique(as.character(Allcells.data@ident)), value = T)) %>% pull(Barcodes)
filtered.AP <- All.AP[!All.AP %in% Valide.AP]
#Remove all invalide cells + 3 pallial outlier cells
Cells.to.use <- rownames(Allcells.data@meta.data)[!rownames(Allcells.data@meta.data) %in% unique(c(SP.cells, Excluded.clusters, filtered.AP, c("ATTTCTGCACGGCCAT" , "CAAGTTGCAAGCCTAT", "CTACATTGTAGCTGCC")))]
Allcells.data <- SubsetData(Allcells.data, cells.use = Cells.to.use, subset.raw = T, do.clean = F)colors <- c("#969696",
tolower(c("#68B041", "#E3C148", "#B7D174", "#E46B6B")),
"#cc3a1b", "#cc8778" , "#d14c8d", "#4cabdc", "#5ab793", "#e7823a","#e6bb9b", "#046c9a", "#4784a2" , "#4990c9")
DimPlot(Allcells.data,
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 2,
no.legend = T,
cols.use = colors)# Filter genes
num.cells <- Matrix::rowSums(Allcells.data@raw.data > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 20)])
Allcells.data@raw.data <- Allcells.data@raw.data[genes.use, ]
# Normalization and variable gene selection
Allcells.data <- NormalizeData(object = Allcells.data,
normalization.method = "LogNormalize",
scale.factor = round(median(Allcells.data@meta.data$nUMI)),
display.progress = F)
Allcells.data <- FindVariableGenes(object = Allcells.data,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.03,
x.high.cutoff = 4,
y.cutoff = 2, do.plot = F,
display.progress = F)
length(Allcells.data@var.genes)## [1] 492
# FataID requieres the terminal clusters ID to be integers
TerminalFates <- grep("16|24|22|19|26|20|21", unique(as.character(Allcells.data@ident)), value = T)
Allcells.data@meta.data$NewClusterID <- sapply(as.character(Allcells.data@ident),
function(x) if(x == TerminalFates[1]){x=1}
else if(x== TerminalFates[2]){x=2}
else if(x== TerminalFates[3]){x=3}
else if(x== TerminalFates[4]){x=4}
else if(x== TerminalFates[5]){x=5}
else if(x== TerminalFates[6]){x=6}
else if(x== TerminalFates[7]){x=7}
else{x=x})
Allcells.data <- SetAllIdent(Allcells.data, id = "NewClusterID")colors <- c("#cc8778" , "#d14c8d", "#4cabdc", "#5ab793", "#e7823a","#e6bb9b","#cc3a1b",
"#969696",
tolower(c("#68B041", "#E3C148", "#B7D174", "#E46B6B")),
"#046c9a", "#4784a2" , "#4990c9")
DimPlot(Allcells.data,
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 4,
no.legend = T,
cols.use = colors)We restricted the analysis to the most variable genes as dertermined by the Seurat function “FindVariableGenes” excluding cell cylce phase genes
# Exclude cell cycle associated genes
CCgenes <- as.character(read.table("./Progenitors/CellCycleGenes.csv", sep = "\t", header = F)[,1])
Input.genes <- Allcells.data@var.genes[!Allcells.data@var.genes %in% CCgenes]
# FateID requieres the expression matrix to be a dataframe
Norm.Mat <- as.data.frame(as.matrix(Allcells.data@data[row.names(Allcells.data@data) %in% Input.genes,]))
# Set a cluster assignment factor for each cells
ClusterIdent <- as.character(Allcells.data@ident)
names(ClusterIdent) <- names(Allcells.data@ident)
# Finaly FateID requires to specify the identity of the attractor state clusters
Attractors <- 1:7Infered.Fate.bias <- fateBias(Norm.Mat, ClusterIdent, Attractors,
z = 1-cor(Norm.Mat, method = "spearman"),
minnr=10,
minnrh=30,
adapt=TRUE,
confidence=0.95,
nbfactor=5,
use.dist=FALSE,
seed=1234,
nbtree=NULL)## minnr: 10
## minnrh: 30
## test set size iteration 1 : 10 10 10 10 10 10 10
## randomforest iteration 1 of 50 cells
## test set size iteration 2 : 10 10 10 10 10 10 10
## randomforest iteration 2 of 62 cells
## test set size iteration 3 : 10 10 10 10 10 10 10
## randomforest iteration 3 of 53 cells
## test set size iteration 4 : 10 10 10 10 10 10 10
## randomforest iteration 4 of 55 cells
## test set size iteration 5 : 10 10 10 10 10 10 10
## randomforest iteration 5 of 49 cells
## test set size iteration 6 : 10 10 10 10 10 10 10
## randomforest iteration 6 of 48 cells
## test set size iteration 7 : 10 10 10 10 10 10 10
## randomforest iteration 7 of 54 cells
## test set size iteration 8 : 10 10 10 10 10 10 10
## randomforest iteration 8 of 49 cells
## test set size iteration 9 : 10 10 10 10 10 10 10
## randomforest iteration 9 of 50 cells
## test set size iteration 10 : 10 10 10 10 10 10 10
## randomforest iteration 10 of 51 cells
## test set size iteration 11 : 10 10 10 10 10 10 10
## randomforest iteration 11 of 52 cells
## test set size iteration 12 : 10 10 10 10 10 10 10
## randomforest iteration 12 of 44 cells
## test set size iteration 13 : 10 10 10 10 10 10 10
## randomforest iteration 13 of 45 cells
## test set size iteration 14 : 10 10 10 10 10 10 10
## randomforest iteration 14 of 42 cells
## test set size iteration 15 : 10 10 10 10 10 10 10
## randomforest iteration 15 of 43 cells
## test set size iteration 16 : 10 10 10 10 10 10 10
## randomforest iteration 16 of 40 cells
## test set size iteration 17 : 10 10 10 10 10 10 10
## randomforest iteration 17 of 40 cells
## test set size iteration 18 : 10 10 10 10 10 10 10
## randomforest iteration 18 of 37 cells
## test set size iteration 19 : 10 10 10 10 10 10 10
## randomforest iteration 19 of 42 cells
## test set size iteration 20 : 10 10 10 10 10 10 10
## randomforest iteration 20 of 43 cells
## test set size iteration 21 : 10 10 10 10 10 10 10
## randomforest iteration 21 of 48 cells
## test set size iteration 22 : 10 10 10 10 10 10 10
## randomforest iteration 22 of 47 cells
## test set size iteration 23 : 10 10 10 10 10 10 10
## randomforest iteration 23 of 50 cells
## test set size iteration 24 : 10 10 10 10 10 10 10
## randomforest iteration 24 of 52 cells
## test set size iteration 25 : 10 10 10 10 10 10 10
## randomforest iteration 25 of 53 cells
## test set size iteration 26 : 10 10 10 10 10 10 10
## randomforest iteration 26 of 57 cells
## test set size iteration 27 : 10 10 10 10 10 10 10
## randomforest iteration 27 of 54 cells
## test set size iteration 28 : 10 10 10 10 10 10 10
## randomforest iteration 28 of 49 cells
## test set size iteration 29 : 10 10 10 10 10 10 10
## randomforest iteration 29 of 54 cells
## test set size iteration 30 : 10 10 10 10 10 10 10
## randomforest iteration 30 of 51 cells
## test set size iteration 31 : 10 10 10 10 10 10 10
## randomforest iteration 31 of 50 cells
## test set size iteration 32 : 10 10 10 10 10 10 10
## randomforest iteration 32 of 55 cells
## test set size iteration 33 : 5 5 5 5 5 5 10
## randomforest iteration 33 of 36 cells
## test set size iteration 34 : 10 10 10 10 10 10 10
## randomforest iteration 34 of 56 cells
## test set size iteration 35 : 10 10 10 10 10 10 10
## randomforest iteration 35 of 55 cells
## test set size iteration 36 : 10 10 10 10 10 10 10
## randomforest iteration 36 of 55 cells
## test set size iteration 37 : 10 10 10 10 10 10 10
## randomforest iteration 37 of 54 cells
## test set size iteration 38 : 10 10 10 10 10 10 10
## randomforest iteration 38 of 56 cells
## test set size iteration 39 : 10 10 10 10 10 10 10
## randomforest iteration 39 of 58 cells
## test set size iteration 40 : 10 10 10 10 10 10 10
## randomforest iteration 40 of 47 cells
## test set size iteration 41 : 10 10 10 10 10 10 10
## randomforest iteration 41 of 44 cells
## test set size iteration 42 : 10 10 10 10 10 10 10
## randomforest iteration 42 of 30 cells
## test set size iteration 43 : 10 10 10 10 10 10 10
## randomforest iteration 43 of 18 cells
## test set size iteration 44 : 10 10 10 10 10 10 10
## randomforest iteration 44 of 2 cells
Allcells.data@meta.data$FateID.iteration <- "0"
Allcells.data <- SetAllIdent(Allcells.data, id = "FateID.iteration")
for (i in seq(0, length(Infered.Fate.bias$rfl), by = 5)[-1]) {
iter <- seq(i-4,i)
Barcodes <- c()
for (j in iter) {
Barcodes <- c(Barcodes, names(Infered.Fate.bias$rfl[[j]]$predicted))
}
Allcells.data <- SetIdent(Allcells.data, cells.use = Barcodes, ident.use = paste0("iter ",iter[1]," to ", iter[4]))
}colors <- c("#d14c8d", "#cc3a1b", "#046c9a", "#e7823a", "#cc8778", "#68b041", "#5ab793", "#e3c148", "#e46B6b", "#b79f0b")
DimPlot(Allcells.data,
reduction.use = "spring",
dim.1 = 1,dim.2 = 2,
do.label=T,
label.size = 3,
no.legend = F,
cols.use = c("#dfdfdf", colors))Allcells.data@meta.data$FateID.iteration <- "0"
Allcells.data <- SetAllIdent(Allcells.data, id = "FateID.iteration")
for (i in seq(0, length(Infered.Fate.bias$rfl), by = 5)[-1]) {
iter <- seq(i-4,i)
Barcodes <- c()
for (j in iter) {
Barcodes <- c(Barcodes, names(Infered.Fate.bias$rfl[[j]]$test$predicted))
}
Allcells.data <- SetIdent(Allcells.data, cells.use = Barcodes, ident.use = paste0("iter ",iter[1]," to ", iter[4]))
}DimPlot(Allcells.data,
reduction.use = "spring",
dim.1 = 1,dim.2 = 2,
do.label=T,
label.size = 3,
no.legend = F,
cols.use = c("#dfdfdf", colors))probs <- Infered.Fate.bias$probs[,seq(length(Attractors))]
Allcells.data@meta.data$prob.Nr4a2 <- probs$t1
Allcells.data@meta.data$prob.Foxp2c <- probs$t2
Allcells.data@meta.data$prob.Ppp1r14c <- probs$t3
Allcells.data@meta.data$prob.Fezf1 <- probs$t4
Allcells.data@meta.data$prob.Foxp2a <- probs$t5
Allcells.data@meta.data$prob.Foxp2b <- probs$t6
Allcells.data@meta.data$prob.Nfix <- probs$t7FeaturePlot(object = Allcells.data,
features.plot = c("prob.Nr4a2", "prob.Nfix",
"prob.Ppp1r14c","prob.Fezf1",
"prob.Foxp2a", "prob.Foxp2b", "prob.Foxp2c"),
cols.use = rev(RColorBrewer::brewer.pal(n = 11, name = "Spectral")),
reduction.use = "spring",
no.legend = T)Manuscript Fig. 7A and S7A
Allcells.data@meta.data$Nr4a2.biase <- ifelse(Allcells.data@meta.data$prob.Nr4a2 > 0.5 & abs(Allcells.data@meta.data$prob.Nfix- Allcells.data@meta.data$prob.Nr4a2) > 0.25, "Nr4a2.lineage", "Other.lineages" )
table(Allcells.data@meta.data$Nr4a2.biase)##
## Nr4a2.lineage Other.lineages
## 617 1682
Allcells.data@meta.data$Nfix.biase <- ifelse(Allcells.data@meta.data$prob.Nfix > 0.5 & abs(Allcells.data@meta.data$prob.Nfix- Allcells.data@meta.data$prob.Nr4a2) > 0.25, "Nfix.lineage", "Other.lineages" )
table(Allcells.data@meta.data$Nfix.biase)##
## Nfix.lineage Other.lineages
## 520 1779
p1 <- DimPlot(Allcells.data,
group.by = "Nr4a2.biase",
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 4,
cols.use = c("#026c9a","#dfdfdf"),
no.legend = T,
do.return = T)
p2 <- DimPlot(Allcells.data,
group.by = "Nfix.biase",
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 4,
cols.use = c("#cc391b","#dfdfdf"),
no.legend = T,
do.return = T)
p1 + p2# Fate biase in progenitor domaines
data <- melt(data.frame(Clusters = Allcells.data@meta.data$Cluster.ident,
Nfix.score = Allcells.data@meta.data$prob.Nfix,
Nr4a2.score = Allcells.data@meta.data$prob.Nr4a2))
colnames(data) <- c("Clusters", "lineage", "Score")data %>%
filter(Clusters %in% c("AP.Ventral.Pallium", "AP.lateral.Pallium.1", "AP.lateral.Pallium.2", "AP.Dorsal.Pallium")) %>%
mutate(Clusters = factor(Clusters, levels =c("AP.Ventral.Pallium", "AP.lateral.Pallium.1", "AP.lateral.Pallium.2", "AP.Dorsal.Pallium"))) %>%
ggplot(aes(x=factor(lineage, levels = c("Nr4a2.score", "Nfix.score") ), y=Score, fill= Clusters)) +
geom_boxplot(notch=TRUE) +
geom_point(position=position_jitterdodge(jitter.width = 0.2), size = .5, aes(group=Clusters)) +
scale_fill_manual(values= c("#68b041", "#e3c148", "#b7d174", "#e46b6b")) +
xlab("")#Subset dataset to keep only cells which showed sufficient difference of fate probability
VP.DP.lineage.cells <- c(rownames(subset(Allcells.data@meta.data, Allcells.data@meta.data$Nr4a2.biase == "Nr4a2.lineage")),
rownames(subset(Allcells.data@meta.data, Allcells.data@meta.data$Nfix.biase == "Nfix.lineage")))
# We further only retain VP and DP progenitor clusters
Allcells.data <- SubsetData(Allcells.data,
cells.use = VP.DP.lineage.cells,
ident.remove = c("AP.lateral.Pallium.1", "AP.lateral.Pallium.2") ,
subset.raw = T,
do.clean = F)
DimPlot(Allcells.data,
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=T,
label.size = 4,
cols.use = c( "#dfdfdf","#68b041", "#e46b6b","#cc3a1b","#cc8778","#e6bb9b" ,"#046c9a","#4784a2"),
no.legend = T)Allcells.data@meta.data$Lineage <- ifelse(Allcells.data@meta.data$Nr4a2.biase == "Nr4a2.lineage", "Nr4a2.lineage", "Nfix.lineage")
DimPlot(Allcells.data,
group.by = "Lineage",
reduction.use = "spring",
dim.1 = 1,
dim.2 = 2,
do.label=T,
cols.use = c("#cc391b","#026c9a"),
label.size = 4,
no.legend = F)Manuscript Fig. 7B
We decided to use spring space dimensionality reduction to fit the principale curve because it has been calculated on all cells together. Thus reflecting pan neuronal differentiation axis of varation.
Nr4a2.lineage.cells <- Allcells.data@meta.data %>%
filter(Lineage == "Nr4a2.lineage") %>%
pull(Barcodes)
## Fit principale curve on the data in the spring space
data.Nr4a2 <- as.data.frame(Allcells.data@dr$spring@cell.embeddings[Nr4a2.lineage.cells,1:2])
fit <- principal_curve(as.matrix(data.Nr4a2[,1:2]),
smoother='smooth_spline',
trace=TRUE,
stretch=2)## Starting curve---distance^2: 4868119869
## Iteration 1---distance^2: 734019.9
## Iteration 2---distance^2: 770558.7
## Iteration 3---distance^2: 787358.8
## Iteration 4---distance^2: 794176.1
## Iteration 5---distance^2: 797375.2
## Iteration 6---distance^2: 799004.2
## Iteration 7---distance^2: 800140.8
## Iteration 8---distance^2: 800778.4
Nr4a2.pc.line <- as.data.frame(fit$s[order(fit$lambda),]) #The principal curve smoothed
data.Nr4a2$PseudotimeScore <- fit$lambda/max(fit$lambda)
data.Nr4a2$Cluster <- Allcells.data@meta.data %>%
filter(Lineage == "Nr4a2.lineage") %>%
pull(NewClusterID)
# Direction of the maturation score using Nes expression (reverte if positive correlation)
if (cor(data.Nr4a2$PseudotimeScore, Allcells.data@data['Hmga2', Nr4a2.lineage.cells]) > 0) {
data.Nr4a2$PseudotimeScore <- -(data.Nr4a2$PseudotimeScore - max(data.Nr4a2$PseudotimeScore))
}Nfix.lineage.cells <- Allcells.data@meta.data %>%
filter(Lineage == "Nfix.lineage") %>%
pull(Barcodes)
## Fit principale curve on the data in the spring space
data.Nfix <- as.data.frame(Allcells.data@dr$spring@cell.embeddings[Nfix.lineage.cells,1:2])
fit <- principal_curve(as.matrix(data.Nfix[,1:2]),
smoother='smooth_spline',
trace=TRUE,
stretch=2)## Starting curve---distance^2: 741666427
## Iteration 1---distance^2: 263692.7
## Iteration 2---distance^2: 257894.2
## Iteration 3---distance^2: 257508.9
## Iteration 4---distance^2: 257452.1
Nfix.pc.line <- as.data.frame(fit$s[order(fit$lambda),]) #The principal curve smoothed
data.Nfix$PseudotimeScore <- fit$lambda/max(fit$lambda)
data.Nfix$Cluster <- Allcells.data@meta.data %>%
filter(Lineage == "Nfix.lineage") %>%
pull(NewClusterID)
# Direction of the maturation score using Nes expression (reverte if positive correlation)
if (cor(data.Nfix$PseudotimeScore, Allcells.data@data['Hmga2', Nfix.lineage.cells]) > 0) {
data.Nfix$PseudotimeScore <- -(data.Nfix$PseudotimeScore - max(data.Nfix$PseudotimeScore))
}Pseudotime <- rbind(data.Nr4a2 %>% dplyr::select(PseudotimeScore),
data.Nfix %>% dplyr::select(PseudotimeScore))
Allcells.data@meta.data$Pseudotime <- Pseudotime[rownames(Allcells.data@meta.data),]#Plot Pseudotime score with the 2 principal curves
data <- as.data.frame(Allcells.data@dr$spring@cell.embeddings[,1:2])
data$PseudotimeScore <- Allcells.data@meta.data$Pseudotime
cols <- colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)
ggplot(data, aes(spring1, spring2)) +
geom_point(aes(color=PseudotimeScore), size=2, shape=16) +
scale_color_gradientn(colours=rev(cols), name='Speudotime score') +
geom_line(data=Nfix.pc.line, color="#cc391b", size=0.77) +
geom_line(data=Nr4a2.pc.line, color="#026c9a", size=0.77)#Filter genes
num.cells <- Matrix::rowSums(Allcells.data@raw.data > 0)
genes.use <- names(x = num.cells[num.cells >= 10])
Allcells.data@raw.data <- Allcells.data@raw.data[genes.use, ]
# Normalization and variable gene selection
Allcells.data <- NormalizeData(object = Allcells.data,
normalization.method = "LogNormalize",
scale.factor = round(median(Allcells.data@meta.data$nUMI)),
display.progress = F)
Allcells.data <- FindVariableGenes(object = Allcells.data,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.03,
x.high.cutoff = 4,
y.cutoff = 1.3, do.plot = F,
display.progress = F)
length(Allcells.data@var.genes)## [1] 926
# Transfert metadata
meta.data <- data.frame(barcode = rownames(Allcells.data@meta.data),
Cluster = Allcells.data@meta.data$Cluster.ident,
Lineage = Allcells.data@meta.data$Lineage,
Pseudotime.Score = Allcells.data@meta.data$Pseudotime,
row.names = rownames(Allcells.data@meta.data))
Annot.data <- new('AnnotatedDataFrame', data = meta.data)
# Transfert count data
count.data = data.frame(gene_short_name = rownames(Allcells.data@raw.data),
row.names = rownames(Allcells.data@raw.data))
feature.data <- new('AnnotatedDataFrame', data = count.data)
# Create the CellDataSet object
gbm_cds <- newCellDataSet(as.matrix(Allcells.data@raw.data),
phenoData = Annot.data,
featureData = feature.data,
lowerDetectionLimit = 1,
expressionFamily = negbinomial())# Exclude cell cycle associated genes
CCgenes <- as.character(read.table("./Progenitors/CellCycleGenes.csv", sep = "\t", header = F)[,1])
Input.genes <- Allcells.data@var.genes[!Allcells.data@var.genes %in% CCgenes]Speudotime.lineages.diff <- differentialGeneTest(gbm_cds[Input.genes,],
fullModelFormulaStr = "~sm.ns(Pseudotime.Score, df = 3)*Lineage",
reducedModelFormulaStr = "~sm.ns(Pseudotime.Score, df = 3)",
cores = detectCores() - 2)
#Filter based on FDR
Speudotime.lineages.diff.filtered <- Speudotime.lineages.diff %>% filter(qval < 5e-2)We find direction of the DEG by calculating the area between curves (ABC) for branch-dependent genes by adapting the monocle package function calABCs. Genes specific ABC is computed on smoothed expression value over 100 points along the pseudotime
# Create a new pseudo-DV vector of 500 points
nPoints <- 100
new_data = list()
for (Lineage in unique(pData(gbm_cds)$Lineage)){
new_data[[length(new_data) + 1]] = data.frame(Pseudotime.Score = seq(min(pData(gbm_cds)$Pseudotime.Score), max(pData(gbm_cds)$Pseudotime.Score), length.out = nPoints), Lineage=Lineage)
}
new_data = do.call(rbind, new_data)
# Smooth gene expression
curve_matrix <- genSmoothCurves(gbm_cds[as.character(Speudotime.lineages.diff.filtered$gene_short_name),],
trend_formula = "~sm.ns(Pseudotime.Score, df = 3)*Lineage",
relative_expr = TRUE,
new_data = new_data,
cores= detectCores() - 2)# Extract matrix containing Smooth curves for each lineages
Nr4a2_curve_matrix <- curve_matrix[, 1:nPoints] # the first 100 points correspond to Nr4a2 cells
Nfix_curve_matrix <- curve_matrix[, (nPoints + 1):(2 * nPoints)]
ABCs_res <- Nr4a2_curve_matrix - Nfix_curve_matrix # Direction of the comparison : postive ABCs <=> Upregulated in Nr4a2 lineage
ILR_res <- log2(Nr4a2_curve_matrix/ (Nfix_curve_matrix + 0.1)) # Average logFC between the 2 curves
ABCs_res <- apply(ABCs_res, 1, function(x, nPoints) {
avg_delta_x <- (x[1:(nPoints - 1)] + x[2:(nPoints)])/2
step <- (100/(nPoints - 1))
res <- round(sum(avg_delta_x * step), 3)
return(res)},
nPoints = nPoints) # Compute the area below the curve
ABCs_res <- cbind(ABCs_res, ILR_res[,ncol(ILR_res)])
colnames(ABCs_res)<- c("ABCs", "Endpoint_ILR")
# Import ABC values into the DE test results table
Speudotime.lineages.diff.filtered <- cbind(Speudotime.lineages.diff.filtered[,1:4],
ABCs_res,
Speudotime.lineages.diff.filtered[,5:6])# Extract Nr4a2 neurons trajectory genes
Nr4a2.res <- as.data.frame(Speudotime.lineages.diff.filtered[Speudotime.lineages.diff.filtered$ABCs > 0,])
Nr4a2.genes <- row.names(Nr4a2.res)
Nr4a2_curve_matrix <- Nr4a2_curve_matrix[rownames(Nr4a2_curve_matrix) %in% Nr4a2.genes, ]# Groupe genes in 6 clusters by partitioning round medoids
Nr4a2.genes.clusters <- cluster::pam(as.dist((1 - cor(Matrix::t(Nr4a2_curve_matrix), method = "spearman"))), k=6)
# Create a dataframe storing DEG test and clustering results
Nr4a2.Gene.dynamique <- data.frame(Gene= names(Nr4a2.genes.clusters$clustering),
pval=Nr4a2.res$pval,
qval=Nr4a2.res$qval,
ABCs=Nr4a2.res$ABCs,
Gene.Clusters= paste0("Clust.", Nr4a2.genes.clusters$clustering)) %>% arrange(Gene.Clusters)
row.names(Nr4a2.Gene.dynamique) <- Nr4a2.Gene.dynamique$Gene
write.table(Nr4a2.Gene.dynamique, "./Nr4a2.Gene.dynamique.csv", sep=";")Sorted.gene.dyn <- Nr4a2.Gene.dynamique %>% arrange(factor(Gene.Clusters, levels = paste0("Clust.",c(3,6,1,2,5,4))))
rownames(Sorted.gene.dyn) <- Sorted.gene.dyn$Gene
anno.colors <- list(lineage = c(Nfix="#cc391b", Nr4a2="#026c9a"))
pheatmap::pheatmap(curve_matrix[as.character(Sorted.gene.dyn$Gene),
c(200:101, #Nfix points
1:100)], #Nr4a2 points
scale = "row",
cluster_rows = F,
cluster_cols = F,
gaps_row = cumsum(as.numeric(table(Sorted.gene.dyn$Gene.Clusters)[paste0("Clust.",c(3,6,1,2,5,4))])),
annotation_row = Sorted.gene.dyn %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(lineage = rep(c("Nr4a2", "Nfix"), each=100)),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = T,
fontsize_row = 2,
color = rev(brewer.pal(11,"RdBu")),
breaks = seq(-4,4, length.out = 11),
main = "Nr4a2 cells enriched genes expression along pseudotime")Manuscript Fig. 7D
# Extract Nfix neurons trajectory genes
Nfix.res <- as.data.frame(Speudotime.lineages.diff.filtered[Speudotime.lineages.diff.filtered$ABCs < 0,])
Nfix.genes <- row.names(Nfix.res)
Nfix_curve_matrix <- Nfix_curve_matrix[rownames(Nfix_curve_matrix) %in% Nfix.genes, ]# Groupe genes in 6 clusters by partitioning round medoids
Nfix.genes.clusters <- cluster::pam(as.dist((1 - cor(Matrix::t(Nfix_curve_matrix), method = "spearman"))), k=6)
# Create a dataframe storing DEG test and clustering results
Nfix.Gene.dynamique <- data.frame(Gene= names(Nfix.genes.clusters$clustering),
pval=Nfix.res$pval,
qval=Nfix.res$qval,
ABCs=Nfix.res$ABCs,
Gene.Clusters= paste0("Clust.", Nfix.genes.clusters$clustering)) %>% arrange(Gene.Clusters)
row.names(Nfix.Gene.dynamique) <- Nfix.Gene.dynamique$Gene
write.table(Nfix.Gene.dynamique, "./Nfix.Gene.dynamique.csv", sep=";")Sorted.gene.dyn <- Nfix.Gene.dynamique %>% arrange(factor(Gene.Clusters, levels = paste0("Clust.",c(6,4,1,2,3,5))))
rownames(Sorted.gene.dyn) <- Sorted.gene.dyn$Gene
pheatmap::pheatmap(curve_matrix[as.character(Sorted.gene.dyn$Gene),
c(200:101, #Nfix points
1:100)], #Nr4a2 points
scale = "row",
cluster_rows = F,
cluster_cols = F,
gaps_row = cumsum(as.numeric(table(Sorted.gene.dyn$Gene.Clusters)[paste0("Clust.",c(6,4,1,2,3,5))])),
annotation_row = Sorted.gene.dyn %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(lineage = rep(c("Nr4a2", "Nfix"), each=100)),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = T,
fontsize_row = 2,
color = rev(brewer.pal(11,"RdBu")),
breaks = seq(-4,4, length.out = 11),
main = "Nfix cells enriched genes expression along pseudotime")Manuscript Fig. 7C
Plot.Genes.trend(Allcells.data,
genes = c("Nfib", "Dbx1",
"Neurod6", "Pbx3",
"Pcp4", "Barhl2"),
color.by = "lineage",
Use.scale.data = F)Manuscript Fig. 7E
Plot.Genes.trend(Allcells.data,
genes = c("Neurod4", "Neurog2",
"Zfhx3", "Tubb3",
"Hey1", "Mapt"),
color.by = "lineage",
Use.scale.data = F)Manuscript Fig. 7E
Plot.Genes.trend(Allcells.data,
genes = c("Tbr1", "Emx1",
"Neurod1", "Hey1",
"Neurod2", "Flrt3"),
color.by = "lineage",
Use.scale.data = F)Manuscript Fig. S7B
Plot.Genes.trend(Allcells.data,
genes = c("Nfia", "Dmrt3",
"Cnr1", "Slc30a10"),
color.by = "lineage",
Use.scale.data = F)Manuscript Fig. S7B
Plot.Genes.trend(Allcells.data,
genes = c("Epha3", "Pdlim4",
"Tox", "Tfap2c"),
color.by = "lineage",
Use.scale.data = F)Manuscript Fig. S7B
Plot.Genes.trend(Allcells.data,
genes = c("Epha3", "Pdlim4",
"Tox", "Tfap2c"),
color.by = "lineage",
Use.scale.data = F)Manuscript Fig. S7B
Plot.Genes.trend(Allcells.data,
genes = c("Neurog1", "Eomes",
"Btg2", "Foxg1",
"Emx2"),
color.by = "lineage",
Use.scale.data = F)Manuscript Fig. S7B
## [1] "04 novembre, 2020, 10,41"
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] wesanderson_0.3.6 RColorBrewer_1.1-2 patchwork_0.0.1
## [4] ggExtra_0.9 reshape2_1.4.3 dplyr_0.8.3
## [7] FateID_0.1.9 monocle_2.14.0 DDRTree_0.1.5
## [10] irlba_2.3.3 VGAM_1.1-2 Biobase_2.46.0
## [13] BiocGenerics_0.32.0 princurve_2.1.4 Seurat_2.3.4
## [16] Matrix_1.2-17 cowplot_1.0.0 ggplot2_3.2.1
##
## loaded via a namespace (and not attached):
## [1] snow_0.4-3 backports_1.1.5 Hmisc_4.3-0
## [4] plyr_1.8.4 igraph_1.2.5 lazyeval_0.2.2
## [7] densityClust_0.3 lle_1.1 fastICA_1.2-2
## [10] digest_0.6.25 foreach_1.4.7 htmltools_0.5.0
## [13] viridis_0.5.1 lars_1.2 gdata_2.18.0
## [16] magrittr_1.5 checkmate_1.9.4 cluster_2.1.0
## [19] mixtools_1.1.0 ROCR_1.0-7 limma_3.42.0
## [22] matrixStats_0.55.0 R.utils_2.9.0 docopt_0.6.1
## [25] askpass_1.1 colorspace_1.4-1 ggrepel_0.8.1
## [28] xfun_0.18 sparsesvd_0.2 crayon_1.3.4
## [31] jsonlite_1.7.0 zeallot_0.1.0 survival_2.44-1.1
## [34] zoo_1.8-6 iterators_1.0.12 ape_5.3
## [37] glue_1.4.1 gtable_0.3.0 kernlab_0.9-29
## [40] prabclus_2.3-1 DEoptimR_1.0-8 scales_1.1.0
## [43] pheatmap_1.0.12 som_0.3-5.1 bibtex_0.4.2
## [46] miniUI_0.1.1.1 Rcpp_1.0.5 metap_1.1
## [49] dtw_1.21-3 xtable_1.8-4 viridisLite_0.3.0
## [52] htmlTable_1.13.2 reticulate_1.13 foreign_0.8-72
## [55] bit_4.0.4 proxy_0.4-23 mclust_5.4.5
## [58] SDMTools_1.1-221.1 Formula_1.2-3 tsne_0.1-3
## [61] umap_0.2.3.1 htmlwidgets_1.5.1 httr_1.4.1
## [64] FNN_1.1.3 gplots_3.0.1.1 fpc_2.2-3
## [67] acepack_1.4.1 modeltools_0.2-22 ica_1.0-2
## [70] farver_2.0.1 pkgconfig_2.0.3 R.methodsS3_1.7.1
## [73] flexmix_2.3-15 nnet_7.3-14 locfit_1.5-9.1
## [76] labeling_0.3 later_1.0.0 tidyselect_0.2.5
## [79] rlang_0.4.7 munsell_0.5.0 tools_3.6.3
## [82] ggridges_0.5.1 fastmap_1.0.1 evaluate_0.14
## [85] stringr_1.4.0 yaml_2.2.1 npsurv_0.4-0
## [88] knitr_1.26 bit64_4.0.2 fitdistrplus_1.0-14
## [91] robustbase_0.93-5 caTools_1.17.1.2 randomForest_4.6-14
## [94] purrr_0.3.3 RANN_2.6.1 pbapply_1.4-2
## [97] nlme_3.1-141 mime_0.7 slam_0.1-46
## [100] R.oo_1.23.0 hdf5r_1.3.2.9000 compiler_3.6.3
## [103] rstudioapi_0.11 png_0.1-7 lsei_1.2-0
## [106] tibble_2.1.3 stringi_1.4.6 highr_0.8
## [109] lattice_0.20-41 HSMMSingleCell_1.6.0 vctrs_0.2.0
## [112] pillar_1.4.2 lifecycle_0.1.0 combinat_0.0-8
## [115] Rdpack_0.11-0 lmtest_0.9-37 data.table_1.12.6
## [118] bitops_1.0-6 gbRd_0.4-11 httpuv_1.5.2
## [121] R6_2.4.1 latticeExtra_0.6-28 promises_1.1.0
## [124] KernSmooth_2.23-15 gridExtra_2.3 codetools_0.2-16
## [127] MASS_7.3-53 gtools_3.8.1 assertthat_0.2.1
## [130] openssl_1.4.1 withr_2.1.2 qlcMatrix_0.9.7
## [133] diptest_0.75-7 doSNOW_1.0.18 grid_3.6.3
## [136] rpart_4.1-15 tidyr_1.0.0 class_7.3-17
## [139] rmarkdown_2.5 segmented_1.0-0 Rtsne_0.15
## [142] shiny_1.4.0 snowfall_1.84-6.1 scatterplot3d_0.3-41
## [145] base64enc_0.1-3
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France↩